rm(list=ls()) # Wiping out the global environment
setwd('C:\\Users\\mawal\\OneDrive - Binghamton University\\Desktop\\Desktop_Folders\\Upwork\\Kreps_2')
df = read.csv('study_1.csv')[-1] # Study 1 file converted to csv

df = df[!is.na(df$Q12_1), ]
df = df[-c(1, 370, 371, 372, 373, 374, 375, 376, 377), ]
df$Age = as.numeric(df$Q12_1) + 16 # For some reason converting to numeric deceases the age by 16.

summary(df$Age)
library(ggplot2)
Age_study1 = ggplot(df, aes(x = Age)) + geom_bar() + theme_classic() + ggtitle('Distribution of Age of Respondents') +
  xlab('Age of Respondent') + ylab('Frequency Count') +
  geom_text(x=55, y=22, label="N = 368") + 
  geom_text(x=56.8, y=20, label="Median = 36") +
  geom_text(x=55.8, y=18, label="Mean = 38")
Age_study1

ggsave('Age_study1.pdf', Age_study1)
  
df_gender = df[df$Q17 != 'Prefer not to say', ]
df_gender$Q17 = ifelse(df_gender$Q17 == 'Non-binary / third gender', 'Non-Binary', df_gender$Q17)
df_gender$Q17 = ifelse(df_gender$Q17 == 2, 'Female', df_gender$Q17)
df_gender$Q17 = ifelse(df_gender$Q17 == 3, 'Male', df_gender$Q17)
df_gender = df_gender[df_gender$Q17 != 4 |  df_gender$Q17 != 5,]

summary(df_gender$Q17)

gender_study1 = ggplot(df_gender, aes(x = Q17)) + geom_bar() + theme_classic() + ggtitle('Gender Distribution') +
  xlab('') + ylab('Frequency Count') + 
  geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white") +
  geom_text(x='Non-Binary', y=14, label="5", color = 'black')
gender_study1 
ggsave('gender_study1.pdf', gender_study1)

summary(df$Q17)

df$race = ifelse(grepl(',', df$Q18), 'Mixed', df$Q18)

df$race = ifelse(df$race == 11, 'Hispanic', df$race)
df$race = ifelse(df$race == 13, 'Other', df$race)
df$race = ifelse(df$race == 15, 'White', df$race)
df$race = ifelse(df$race == 9, 'Black', df$race)
df$race = ifelse(df$race == 6, 'Asian', df$race)
df$race = ifelse(df$race == 2, 'American Indian', df$race)

race_study1 = ggplot(df, aes(race)) + geom_bar() + theme_classic() + ggtitle('Race Distribution') +
  xlab('') + ylab('Frequency Count') +
  geom_text(x='American Indian', y=17, label="6", color = 'black') +
  geom_text(x='Asian', y=17, label="50", color = 'white') +
  geom_text(x='Black', y=17, label="30", color = 'white') +
  geom_text(x='Hispanic', y=30, label="20", color = 'black') +
  geom_text(x='Mixed', y=30, label="18", color = 'black') +
  geom_text(x='Other', y=17, label="4", color = 'black') +
  geom_text(x='White', y=220, label="240", color = 'white')
race_study1
ggsave('race_study1.pdf', race_study1)

df_inc = df[df$ResponseId != 'R_3hgZoLQUSd4HbAM',]

df_inc$inc = ifelse(df_inc$Q19 == 'Below $20,000', '20k less', df_inc$Q19)
df_inc$inc = ifelse(df_inc$Q19 == '$20,000 - $39,999', '20k-40k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$40,000 - $59,999', '40k-60k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$80,000 - $99,999', '80k-100k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$60,000 - $79,999', '60k-80k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$100,000 or more', '$100k more', df_inc$inc)

df_inc$inc = factor(df_inc$inc, levels = c("20k less","20k-40k","40k-60k","60k-80k","80k-100k","$100k more"))

income_study1 = ggplot(df_inc, aes(inc)) + geom_bar() + theme_classic() + ggtitle('Income Distribution') +
  xlab('') + ylab('Frequency Count') +
  geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")
income_study1
ggsave('income_study1.pdf', income_study1)

df$Q20 = factor(df$Q20, levels = c("Less than high school","High school graduate","Professional degree","Some college","2 year degree","4 year degree","Doctorate"))

educ_study1 = ggplot(df, aes(Q20)) + geom_bar() + theme_classic() + ggtitle('Income Distribution') +
  xlab('') + ylab('Frequency Count') + coord_flip() +
  geom_text(aes(label = ..count..), stat = "count", hjust = 1.5, colour = "white")  +
  geom_text(x='Less than high school', y=5, label="2", color = 'black') 
educ_study1
ggsave('educ_study1.pdf', educ_study1)


loc_study1 = ggplot(df, aes(Q21)) + geom_bar() + theme_classic() + ggtitle('Location Distribution') +
  xlab('') + ylab('Frequency Count') + 
  geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")
loc_study1
ggsave('loc_study1.pdf', loc_study1)

#############################################################
rm(list=ls()) # Wiping out the global environment
setwd('C:\\Users\\mawal\\OneDrive - Binghamton University\\Desktop\\Desktop_Folders\\Upwork\\Kreps_2')
df = read.csv('Study_2_Full_Run_Feb_5.csv') # Study 2 file converted to csv

df = df[-1,]
df = df[-1,]
df = df[!is.na(df$Q12_1), ]
df$Age = as.numeric(df$Q12_1) + 16

summary(df$Age)

library(ggplot2)
Age_study2 = ggplot(df, aes(x = Age)) + geom_bar() + theme_classic() + ggtitle('Distribution of Age of Respondents') +
  xlab('Age of Respondent') + ylab('Frequency Count') +
  geom_text(x=55.0, y=48, label="N = 1301") + 
  geom_text(x=56.8, y=44, label="Median = 37") +
  geom_text(x=55.8, y=40, label="Mean = 41")
Age_study2
ggsave('Age_study2.pdf', Age_study2)

df_gender = df[df$Q17 != 'Prefer not to say', ]
df_gender$Q17 = ifelse(df_gender$Q17 == 'Non-binary / third gender', 'Non-Binary', df_gender$Q17)
df_gender$Q17 = ifelse(df_gender$Q17 == 3, 'Female', df_gender$Q17)
df_gender$Q17 = ifelse(df_gender$Q17 == 4, 'Male', df_gender$Q17)
df_gender = df_gender[df_gender$Q17 != 4 |  df_gender$Q17 != 5,]
df_gender = df_gender[df_gender$Q17 != 5,]
df_gender = df_gender[df_gender$Q17 != 1,]

summary(df$Q17)

gender_study2 = ggplot(df_gender, aes(x = Q17)) + geom_bar() + theme_classic() + ggtitle('Gender Distribution') +
  xlab('') + ylab('Frequency Count') + 
  geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white") +
  geom_text(x='Non-Binary', y=40, label="6", color = 'black')
gender_study2
ggsave('gender_study2.pdf', gender_study2)


df$race = ifelse(grepl(',', df$Q18), 'Mixed', df$Q18)

df$race = ifelse(df$race == 18, 'Hispanic', df$race)
df$race = ifelse(df$race == 21, 'Other', df$race)
df$race = ifelse(df$race == 23, 'White', df$race)
df$race = ifelse(df$race == 14, 'Black', df$race)
df$race = ifelse(df$race == 11, 'Asian', df$race)
df$race = ifelse(df$race == 3, 'American Indian', df$race)

df2 = df
df2 = df2[df2$race != 1,]


summary(df2$Q18)
race_study2 = ggplot(df2, aes(race)) + geom_bar() + theme_classic() + ggtitle('Race Distribution') +
  xlab('') + ylab('Frequency Count') +
  geom_text(x='American Indian', y=70, label="11", color = 'black') +
  geom_text(x='Asian', y=50, label="106", color = 'white') +
  geom_text(x='Black', y=59, label="115", color = 'white') +
  geom_text(x='Hispanic', y=101, label="52", color = 'black') +
  geom_text(x='Mixed', y=100, label="51", color = 'black') +
  geom_text(x='Other', y=77, label="18", color = 'black') +
  geom_text(x='White', y=800, label="904", color = 'white')
race_study2
ggsave('race_study2.pdf', race_study2)

df_inc = df[df$ResponseId != 'R_3hgZoLQUSd4HbAM',]

df_inc$inc = ifelse(df_inc$Q19 == 'Below $20,000', '20k less', df_inc$Q19)
df_inc$inc = ifelse(df_inc$Q19 == '$20,000 - $39,999', '20k-40k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$40,000 - $59,999', '40k-60k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$80,000 - $99,999', '80k-100k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$60,000 - $79,999', '60k-80k', df_inc$inc)
df_inc$inc = ifelse(df_inc$Q19 == '$100,000 or more', '100k more', df_inc$inc)

df_inc$inc = factor(df_inc$inc, levels = c("20k less","20k-40k","40k-60k","60k-80k","80k-100k","100k more"))

df_inc = df_inc[!is.na(df_inc$inc), ]

inc_study2 = ggplot(df_inc, aes(inc)) + geom_bar() + theme_classic() + ggtitle('Income Distribution') +
  xlab('') + ylab('Frequency Count') +
  geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")
inc_study2
ggsave('inc_study2.pdf', inc_study2)

df2$Q20 = factor(df2$Q20, levels = c("Less than high school","High school graduate","Professional degree","Some college","2 year degree","4 year degree","Doctorate"))

summary(df2$Q20)
educ_study2 = ggplot(df2, aes(Q20)) + geom_bar() + theme_classic() + ggtitle('Income Distribution') +
  xlab('') + ylab('Frequency Count') + coord_flip() +
  geom_text(aes(label = ..count..), stat = "count", hjust = 2, colour = "white")  +
  geom_text(x='Less than high school', y=30, label="13", color = 'black') +
  geom_text(x='Doctorate', y=30, label="16", color = 'black')
educ_study2
ggsave('educ_study2.pdf', educ_study2)


loc_study2 = ggplot(df2, aes(Q21)) + geom_bar() + theme_classic() + ggtitle('Location Distribution') +
  xlab('') + ylab('Frequency Count') + 
  geom_text(aes(label = ..count..), stat = "count", vjust = 1.5, colour = "white")
loc_study2
ggsave('loc_study2.pdf', loc_study2)

